URLs http://rpubs.com/adam_dennett/443357
http://egallic.fr/R/sKDE/smooth-maps/kde.html

libraries

#install.packages("downloader")
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 3.4.2
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 3.0.0.9000     ✔ purrr   0.2.5     
## ✔ tibble  1.4.2          ✔ dplyr   0.7.6     
## ✔ tidyr   0.8.1          ✔ stringr 1.3.1     
## ✔ readr   1.1.1          ✔ forcats 0.3.0
## Warning: package 'tibble' was built under R version 3.4.3
## Warning: package 'tidyr' was built under R version 3.4.4
## Warning: package 'purrr' was built under R version 3.4.4
## Warning: package 'dplyr' was built under R version 3.4.4
## Warning: package 'stringr' was built under R version 3.4.4
## Warning: package 'forcats' was built under R version 3.4.3
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library(downloader)
library(rgdal)
## Warning: package 'rgdal' was built under R version 3.4.4
## Loading required package: sp
## Warning: package 'sp' was built under R version 3.4.4
## rgdal: version: 1.3-4, (SVN revision 766)
##  Geospatial Data Abstraction Library extensions to R successfully loaded
##  Loaded GDAL runtime: GDAL 2.1.3, released 2017/20/01
##  Path to GDAL shared files: /Library/Frameworks/R.framework/Versions/3.4/Resources/library/rgdal/gdal
##  GDAL binary built with GEOS: FALSE 
##  Loaded PROJ.4 runtime: Rel. 4.9.3, 15 August 2016, [PJ_VERSION: 493]
##  Path to PROJ.4 shared files: /Library/Frameworks/R.framework/Versions/3.4/Resources/library/rgdal/proj
##  Linking to sp version: 1.3-1
library(sf)
## Warning: package 'sf' was built under R version 3.4.4
## Linking to GEOS 3.6.1, GDAL 2.1.3, proj.4 4.9.3
library(ggplot2)
library(reshape2)
## Warning: package 'reshape2' was built under R version 3.4.3
## 
## Attaching package: 'reshape2'
## The following object is masked from 'package:tidyr':
## 
##     smiths
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
#library(highcharter)
library(ggthemes)
## Warning: package 'ggthemes' was built under R version 3.4.4

Get data

LondonWards <- readOGR("/Volumes/ucfnnap/CASA_GIS and Sc/wk8/NewLondonWard/NewLondonWard.shp", layer="NewLondonWard")
## OGR data source with driver: ESRI Shapefile 
## Source: "/Volumes/ucfnnap/CASA_GIS and Sc/wk8/NewLondonWard/NewLondonWard.shp", layer: "NewLondonWard"
## with 625 features
## It has 76 fields
## Integer64 fields read as strings:  x y
LondonWardsSF <- st_as_sf(LondonWards)
extradata <- read_csv("https://www.dropbox.com/s/qay9q1jwpffxcqj/LondonAdditionalDataFixed.csv?raw=1")
## Parsed with column specification:
## cols(
##   WardName = col_character(),
##   WardCode = col_character(),
##   Wardcode = col_character(),
##   PctSharedOwnership2011 = col_double(),
##   PctRentFree2011 = col_double(),
##   Candidate = col_character(),
##   InnerOuter = col_character(),
##   x = col_double(),
##   y = col_double(),
##   AvgGCSE2011 = col_double(),
##   UnauthAbsenceSchools11 = col_double()
## )

Merge datasets

LondonWardsSF <- merge(LondonWardsSF, extradata, by.x = "WD11CD", by.y = "Wardcode")
summary(LondonWardsSF)
##        WD11CD       WD11CDO               WD11NM    WD11NMW   
##  E05000026:  1   00AA   :  1   Village       :  3   NA's:625  
##  E05000027:  1   00ABFX :  1   Abbey         :  2             
##  E05000028:  1   00ABFY :  1   Alexandra     :  2             
##  E05000029:  1   00ABFZ :  1   Barnhill      :  2             
##  E05000030:  1   00ABGA :  1   Belmont       :  2             
##  E05000031:  1   00ABGB :  1   Brunswick Park:  2             
##  (Other)  :619   (Other):619   (Other)       :612             
##                                  WardName.x    WardCode.x      Wardcode1  
##  Barking and Dagenham - Abbey         :  1   00AA   :  1   E05000026:  1  
##  Barking and Dagenham - Alibon        :  1   00ABFX :  1   E05000027:  1  
##  Barking and Dagenham - Becontree     :  1   00ABFY :  1   E05000028:  1  
##  Barking and Dagenham - Chadwell Heath:  1   00ABFZ :  1   E05000029:  1  
##  Barking and Dagenham - Eastbrook     :  1   00ABGA :  1   E05000030:  1  
##  Barking and Dagenham - Eastbury      :  1   00ABGB :  1   (Other)  :619  
##  (Other)                              :619   (Other):619   NA's     :  1  
##    PopCensus2       Aged0_15      Aged16_64       Aged65plus  
##  Min.   : 5110   Min.   : 620   Min.   : 3056   Min.   : 431  
##  1st Qu.:11197   1st Qu.:2041   1st Qu.: 7536   1st Qu.:1104  
##  Median :12979   Median :2517   Median : 9024   Median :1338  
##  Mean   :13078   Mean   :2600   Mean   : 9031   Mean   :1448  
##  3rd Qu.:14862   3rd Qu.:3084   3rd Qu.:10388   3rd Qu.:1667  
##  Max.   :23084   Max.   :5652   Max.   :18688   Max.   :3364  
##                                                               
##    PctAged0_1       PctAged16_      PctAged65p       MeanAge201   
##  Min.   : 7.134   Min.   :58.38   Min.   : 3.483   Min.   :29.00  
##  1st Qu.:17.523   1st Qu.:65.02   1st Qu.: 8.352   1st Qu.:33.60  
##  Median :19.535   Median :68.19   Median :10.661   Median :35.40  
##  Mean   :19.688   Mean   :68.96   Mean   :11.348   Mean   :35.82  
##  3rd Qu.:21.933   3rd Qu.:72.54   3rd Qu.:13.739   3rd Qu.:37.90  
##  Max.   :33.188   Max.   :82.85   Max.   :23.523   Max.   :44.10  
##                                                                   
##    MedianAge2       AreaSqKM        PopDensity       PctBame     
##  Min.   :26.00   Min.   : 0.400   Min.   :  181   Min.   : 4.10  
##  1st Qu.:31.00   1st Qu.: 1.200   1st Qu.: 4500   1st Qu.:23.50  
##  Median :33.00   Median : 1.900   Median : 6600   Median :36.10  
##  Mean   :34.26   Mean   : 2.552   Mean   : 7930   Mean   :38.74  
##  3rd Qu.:37.00   3rd Qu.: 2.900   3rd Qu.:10500   3rd Qu.:52.30  
##  Max.   :46.00   Max.   :29.000   Max.   :27750   Max.   :93.70  
##                                                                  
##    PctNotBorn      PctNoEngli      GenFertRat       MaleLE0509   
##  Min.   : 5.10   Min.   : 0.40   Min.   : 21.79   Min.   :71.22  
##  1st Qu.:26.80   1st Qu.: 7.00   1st Qu.: 55.35   1st Qu.:75.91  
##  Median :37.30   Median :11.90   Median : 66.90   Median :78.03  
##  Mean   :36.05   Mean   :12.85   Mean   : 68.79   Mean   :78.38  
##  3rd Qu.:45.80   3rd Qu.:18.10   3rd Qu.: 79.63   3rd Qu.:80.35  
##  Max.   :68.50   Max.   :36.70   Max.   :130.20   Max.   :98.06  
##                                                                  
##    FemaleLE05      RateAmbula      RatesAmbul        InEmployme   
##  Min.   :75.91   Min.   : 68.7   Min.   : 0.1373   Min.   : 2443  
##  1st Qu.:81.41   1st Qu.:103.8   1st Qu.: 0.4303   1st Qu.: 5461  
##  Median :83.28   Median :119.6   Median : 0.5851   Median : 6247  
##  Mean   :83.60   Mean   :131.8   Mean   : 0.8593   Mean   : 6398  
##  3rd Qu.:85.41   3rd Qu.:143.4   3rd Qu.: 0.8543   3rd Qu.: 7216  
##  Max.   :99.55   Max.   :959.7   Max.   :26.8356   Max.   :13838  
##                                                                   
##    Employment      NoJobs2011       EmpWkAgePo         RateNINoFo      
##  Min.   :45.02   Min.   :   600   Min.   : 0.07639   Min.   :  0.7246  
##  1st Qu.:61.85   1st Qu.:  2100   1st Qu.: 0.25764   1st Qu.: 20.0000  
##  Median :65.95   Median :  3500   Median : 0.40203   Median : 38.8571  
##  Mean   :65.46   Mean   :  7091   Mean   : 0.80134   Mean   : 43.9887  
##  3rd Qu.:69.24   3rd Qu.:  6000   3rd Qu.: 0.67914   3rd Qu.: 60.4520  
##  Max.   :81.48   Max.   :382700   Max.   :50.02183   Max.   :149.1803  
##                                                                        
##    MeanHouseP        NoProperti    NoHousehol      PctDetache    
##  Min.   : 139270   Min.   : 25   Min.   : 2169   Min.   : 0.300  
##  1st Qu.: 238347   1st Qu.: 94   1st Qu.: 4570   1st Qu.: 2.200  
##  Median : 321992   Median :128   Median : 5335   Median : 4.300  
##  Mean   : 415938   Mean   :138   Mean   : 5420   Mean   : 6.598  
##  3rd Qu.: 453378   3rd Qu.:168   3rd Qu.: 6178   3rd Qu.: 7.400  
##  Max.   :4595285   Max.   :474   Max.   :12035   Max.   :55.800  
##                                                                  
##    PctSemiDet      PctTerrace      PctFlatMai      PctOwned20   
##  Min.   : 0.20   Min.   : 1.40   Min.   : 6.30   Min.   :11.90  
##  1st Qu.: 6.40   1st Qu.:13.30   1st Qu.:29.40   1st Qu.:34.30  
##  Median :16.90   Median :21.00   Median :46.10   Median :48.30  
##  Mean   :19.95   Mean   :23.65   Mean   :49.72   Mean   :49.67  
##  3rd Qu.:30.00   3rd Qu.:32.60   3rd Qu.:71.40   3rd Qu.:64.40  
##  Max.   :82.30   Max.   :63.80   Max.   :98.00   Max.   :90.80  
##                                                                 
##    PctSocialR      PctPrivate      PctSharedO        PctRentFre    
##  Min.   : 1.10   Min.   : 4.80   Min.   :0.04818   Min.   :0.5202  
##  1st Qu.:11.30   1st Qu.:17.30   1st Qu.:0.51520   1st Qu.:0.9385  
##  Median :20.40   Median :24.40   Median :0.95765   Median :1.1492  
##  Mean   :23.33   Mean   :24.45   Mean   :1.22841   Mean   :1.3102  
##  3rd Qu.:34.00   3rd Qu.:31.60   3rd Qu.:1.61262   3rd Qu.:1.4391  
##  Max.   :68.40   Max.   :55.50   Max.   :7.38835   Max.   :9.9948  
##                                                                    
##    PctCTaxBan        PctCTaxB_1       PctCTaxB_2       MortgageRe    
##  Min.   : 0.2654   Min.   : 5.316   Min.   : 0.000   Min.   : 0.000  
##  1st Qu.: 5.4676   1st Qu.:59.107   1st Qu.: 1.490   1st Qu.: 0.000  
##  Median :11.8540   Median :69.181   Median : 8.135   Median :10.000  
##  Mean   :16.4961   Mean   :68.873   Mean   :15.255   Mean   : 8.696  
##  3rd Qu.:25.2222   3rd Qu.:80.993   3rd Qu.:24.458   3rd Qu.:15.000  
##  Max.   :64.3534   Max.   :98.020   Max.   :95.713   Max.   :70.000  
##                                                                      
##    LandlordRe       Incapacity        IncomeSupp        JSAClaiman     
##  Min.   :  0.00   Min.   : 0.2381   Min.   : 0.1786   Min.   : 0.4072  
##  1st Qu.: 20.00   1st Qu.: 2.8571   1st Qu.: 2.9936   1st Qu.: 3.3068  
##  Median : 40.00   Median : 4.0796   Median : 4.7333   Median : 5.4305  
##  Mean   : 43.22   Mean   : 4.2071   Mean   : 5.0141   Mean   : 6.1117  
##  3rd Qu.: 60.00   3rd Qu.: 5.3459   3rd Qu.: 6.7424   3rd Qu.: 8.3867  
##  Max.   :195.00   Max.   :12.6357   Max.   :16.1147   Max.   :23.4036  
##                                                                        
##    JSAClaim_1      PctDepChil       PctDepCh_1       PctHHNoAdu     
##  Min.   : 0.00   Min.   : 3.343   Min.   : 2.736   Min.   : 0.8666  
##  1st Qu.: 6.45   1st Qu.:21.610   1st Qu.:16.694   1st Qu.: 3.1171  
##  Median :10.43   Median :31.522   Median :26.265   Median : 5.1414  
##  Mean   :11.73   Mean   :32.404   Mean   :26.172   Mean   : 5.5876  
##  3rd Qu.:15.72   3rd Qu.:42.588   3rd Qu.:34.973   3rd Qu.: 7.4833  
##  Max.   :48.67   Max.   :63.041   Max.   :55.172   Max.   :15.5004  
##                                                                     
##    PctLonePar      IDRankLond      IDPctWorst       AvgGCSE201   
##  Min.   :20.82   Min.   :  1.0   Min.   :  0.00   Min.   :245.0  
##  1st Qu.:40.91   1st Qu.:157.0   1st Qu.: 30.00   1st Qu.:332.3  
##  Median :46.67   Median :313.0   Median : 83.33   Median :343.7  
##  Mean   :45.97   Mean   :313.5   Mean   : 65.55   Mean   :345.8  
##  3rd Qu.:51.36   3rd Qu.:470.0   3rd Qu.:100.00   3rd Qu.:358.3  
##  Max.   :75.63   Max.   :628.0   Max.   :100.00   Max.   :409.1  
##                                                                  
##    UnauthAbse       PctWithNoQ      PctLev4Qua      CrimeRate1     
##  Min.   :0.2463   Min.   : 3.80   Min.   :12.50   Min.   :  25.75  
##  1st Qu.:0.8215   1st Qu.:13.50   1st Qu.:27.30   1st Qu.:  64.09  
##  Median :1.1364   Median :17.40   Median :35.50   Median :  84.83  
##  Mean   :1.1286   Mean   :17.62   Mean   :37.66   Mean   : 101.05  
##  3rd Qu.:1.4105   3rd Qu.:21.60   3rd Qu.:47.00   3rd Qu.: 107.57  
##  Max.   :2.4675   Max.   :35.80   Max.   :68.70   Max.   :2100.20  
##                                                                    
##    ViolenceRa        RobberyRat       TheftAndHa        CriminalDa    
##  Min.   :  3.163   Min.   : 0.215   Min.   :   0.00   Min.   : 2.474  
##  1st Qu.: 11.058   1st Qu.: 2.116   1st Qu.:  20.90   1st Qu.: 6.512  
##  Median : 16.878   Median : 3.833   Median :  29.61   Median : 8.358  
##  Mean   : 18.737   Mean   : 4.580   Mean   :  41.69   Mean   : 8.962  
##  3rd Qu.: 23.082   3rd Qu.: 6.042   3rd Qu.:  40.74   3rd Qu.:10.783  
##  Max.   :215.882   Max.   :53.668   Max.   :1486.73   Max.   :42.613  
##                                                                       
##    DrugsRate1         Deliberate       PctOpenSpa      CarsPerHH2    
##  Min.   :  0.6987   Min.   :0.0000   Min.   : 0.00   Min.   :0.2333  
##  1st Qu.:  2.7723   1st Qu.:0.2000   1st Qu.:13.84   1st Qu.:0.5591  
##  Median :  5.0365   Median :0.4000   Median :23.51   Median :0.8184  
##  Mean   :  7.4460   Mean   :0.5402   Mean   :27.13   Mean   :0.8426  
##  3rd Qu.:  8.5567   3rd Qu.:0.7000   3rd Qu.:36.85   3rd Qu.:1.1088  
##  Max.   :190.9804   Max.   :3.5000   Max.   :88.80   Max.   :1.7051  
##                                                                      
##    AvgPubTran      TurnoutMay          ID        x.x        y.x     
##  Min.   :1.250   Min.   :19.30   Min.   :  0   NA's:625   NA's:625  
##  1st Qu.:2.584   1st Qu.:30.90   1st Qu.:156                        
##  Median :3.221   Median :34.22   Median :312                        
##  Mean   :3.639   Mean   :34.14   Mean   :312                        
##  3rd Qu.:4.377   3rd Qu.:37.64   3rd Qu.:468                        
##  Max.   :7.950   Max.   :51.72   Max.   :624                        
##                                                                     
##   WardName.y         WardCode.y        PctSharedOwnership2011
##  Length:625         Length:625         Min.   :0.04818       
##  Class :character   Class :character   1st Qu.:0.51520       
##  Mode  :character   Mode  :character   Median :0.95765       
##                                        Mean   :1.22841       
##                                        3rd Qu.:1.61262       
##                                        Max.   :7.38835       
##                                                              
##  PctRentFree2011   Candidate          InnerOuter             x.y        
##  Min.   :0.5202   Length:625         Length:625         Min.   :505213  
##  1st Qu.:0.9385   Class :character   Class :character   1st Qu.:523226  
##  Median :1.1492   Mode  :character   Mode  :character   Median :530429  
##  Mean   :1.3102                                         Mean   :530354  
##  3rd Qu.:1.4391                                         3rd Qu.:537695  
##  Max.   :9.9948                                         Max.   :557694  
##                                                                         
##       y.y          AvgGCSE2011    UnauthAbsenceSchools11
##  Min.   :157876   Min.   :245.0   Min.   :0.2463        
##  1st Qu.:174242   1st Qu.:332.3   1st Qu.:0.8215        
##  Median :180932   Median :343.7   Median :1.1364        
##  Mean   :180131   Mean   :345.8   Mean   :1.1286        
##  3rd Qu.:186503   3rd Qu.:358.3   3rd Qu.:1.4105        
##  Max.   :199314   Max.   :409.1   Max.   :2.4675        
##                                                         
##           geometry  
##  MULTIPOLYGON :625  
##  epsg:NA      :  0  
##  +proj=tmer...:  0  
##                     
##                     
##                     
## 
names(LondonWardsSF)
##  [1] "WD11CD"                 "WD11CDO"               
##  [3] "WD11NM"                 "WD11NMW"               
##  [5] "WardName.x"             "WardCode.x"            
##  [7] "Wardcode1"              "PopCensus2"            
##  [9] "Aged0_15"               "Aged16_64"             
## [11] "Aged65plus"             "PctAged0_1"            
## [13] "PctAged16_"             "PctAged65p"            
## [15] "MeanAge201"             "MedianAge2"            
## [17] "AreaSqKM"               "PopDensity"            
## [19] "PctBame"                "PctNotBorn"            
## [21] "PctNoEngli"             "GenFertRat"            
## [23] "MaleLE0509"             "FemaleLE05"            
## [25] "RateAmbula"             "RatesAmbul"            
## [27] "InEmployme"             "Employment"            
## [29] "NoJobs2011"             "EmpWkAgePo"            
## [31] "RateNINoFo"             "MeanHouseP"            
## [33] "NoProperti"             "NoHousehol"            
## [35] "PctDetache"             "PctSemiDet"            
## [37] "PctTerrace"             "PctFlatMai"            
## [39] "PctOwned20"             "PctSocialR"            
## [41] "PctPrivate"             "PctSharedO"            
## [43] "PctRentFre"             "PctCTaxBan"            
## [45] "PctCTaxB_1"             "PctCTaxB_2"            
## [47] "MortgageRe"             "LandlordRe"            
## [49] "Incapacity"             "IncomeSupp"            
## [51] "JSAClaiman"             "JSAClaim_1"            
## [53] "PctDepChil"             "PctDepCh_1"            
## [55] "PctHHNoAdu"             "PctLonePar"            
## [57] "IDRankLond"             "IDPctWorst"            
## [59] "AvgGCSE201"             "UnauthAbse"            
## [61] "PctWithNoQ"             "PctLev4Qua"            
## [63] "CrimeRate1"             "ViolenceRa"            
## [65] "RobberyRat"             "TheftAndHa"            
## [67] "CriminalDa"             "DrugsRate1"            
## [69] "Deliberate"             "PctOpenSpa"            
## [71] "CarsPerHH2"             "AvgPubTran"            
## [73] "TurnoutMay"             "ID"                    
## [75] "x.x"                    "y.x"                   
## [77] "WardName.y"             "WardCode.y"            
## [79] "PctSharedOwnership2011" "PctRentFree2011"       
## [81] "Candidate"              "InnerOuter"            
## [83] "x.y"                    "y.y"                   
## [85] "AvgGCSE2011"            "UnauthAbsenceSchools11"
## [87] "geometry"

London data store for data key=value reference https://londondatastore-upload.s3.amazonaws.com/instant-atlas/borough-profiles/atlas.html

Simple histogram

### style
th <- theme_tufte(base_family = "Georgia")

### AvgPubTrans = Average Public Transport Accesibility score, 2014
ggplot(LondonWardsSF, aes(AvgPubTran)) +
  geom_histogram() + th
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Simple histogram binwidth = 0.1

ggplot(LondonWardsSF, aes(AvgPubTran)) +
  geom_histogram(binwidth = 0.1) + th

Histogram with vertical lines Central Tendency

CT <- ggplot(LondonWardsSF, aes(AvgPubTran)) +
  geom_histogram(binwidth = 0.1) + th +
  geom_vline(aes(xintercept = mean(AvgPubTran)), colour="yellow") +
  geom_vline(aes(xintercept = median(AvgPubTran)), colour="magenta") 

Calulate Mode

### Rounded PTAL to get 'most typical' value (otherwise all are unique values)
x <- round(LondonWardsSF$AvgPubTran, digits = 1)
y <- table(x)
names(y)[which(y==max(y))]
## [1] "2.4"

Histogram with Mode

CT + geom_vline(aes(xintercept = 2.4), colour="cyan")

Annotate Histogram

m_ean <- annotate("text", label = paste("Mean\n", round(mean(LondonWardsSF$AvgPubTran), digits = 2)), x = mean(LondonWardsSF$AvgPubTran), y = 7, color = "white", size = 3, family = "Georgia", hjust =-.01)
m_edian <- annotate("text", label = paste("Median\n", round(median(LondonWardsSF$AvgPubTran), digits = 2)), x = median(LondonWardsSF$AvgPubTran), y = 3, color = "white", size = 3, family = "Georgia", hjust =-.01)
m_ode <- annotate("text", label = paste("Mode: ", 2.4), x = 2.4, y = 3, color = "white", size = 3, family = "Georgia", angle = 90, vjust =-.5)

CT + geom_vline(aes(xintercept = 2.4), colour="cyan") + m_ean + m_edian + m_ode

Kernel density smoothed frequency gradients

ggplot(LondonWardsSF, aes(x=AvgPubTran, y=..density..)) + geom_histogram(aes(y = ..density..))+
  geom_density(colour="red", fill="red", alpha=0.2) + 
  geom_density(colour="orange", fill="orange", alpha=0.2, adjust = 1/5) + 
  geom_density(colour="purple", fill="purple", alpha=0.2, adjust = 1/3) + th
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Different Kernels

ggplot(LondonWardsSF, aes(x=AvgPubTran, y=..density..)) + geom_histogram(aes(y = ..density..))+
  geom_density(colour="red", fill="red", alpha=0.2, adjust = 1/3, kernel="gaussian") + 
  geom_density(colour="orange", fill="orange", alpha=0.2, adjust = 1/3, kernel="rectangular") + 
  geom_density(colour="purple", fill="purple", alpha=0.2, adjust = 1/3, kernel="cosine") + th
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Boxplot

bp <- ggplot(LondonWardsSF, aes(1, AvgPubTran)) + theme_tufte() +
  geom_tufteboxplot()
bp

Annotate Boxplot

fn <- fivenum(LondonWardsSF$AvgPubTran)
an <- annotate(geom="text", label=round(fn ,digits=2), x= 1, y=fn, size = 3, family = "Georgia", hjust =-.2)
bp + an + theme(axis.title=element_blank(),axis.ticks.x = element_blank(), axis.text.x=element_blank())

Faceted grid

#check which variables are numeric first
list1 <- as.data.frame(cbind(lapply(LondonWardsSF, class)))
list1 <- cbind(list1, seq.int(nrow(list1)))

#you will notice that there are some non-numeric columns, we want to exclue these, and drop the geometry 
LondonSub <- LondonWardsSF[,c(1:73,83:86)]
#make sure the geometry is null or we will get errors - also create some subsets so that we can see our data better
LondonSub2 <- st_set_geometry(LondonWardsSF[,c(1:3,9:27)],NULL)
LondonSub3 <- st_set_geometry(LondonWardsSF[,c(1:3,28:50)],NULL)
LondonSub4 <- st_set_geometry(LondonWardsSF[,c(1:3,51:73,85:86)],NULL)

LondonMelt2 <- melt(LondonSub2, id.vars = 1:3)
attach(LondonMelt2)
hist2 <- ggplot(LondonMelt2, aes(x=value)) + geom_histogram(aes(y = ..density..)) + geom_density(colour="cyan", size=1, adjust=1) + theme_tufte()
hist2 + facet_wrap(~ variable, scales="free")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

LondonMelt3 <- melt(LondonSub3, id.vars = 1:3)
attach(LondonMelt3)
## The following objects are masked from LondonMelt2:
## 
##     value, variable, WD11CD, WD11CDO, WD11NM
hist3 <- ggplot(LondonMelt3, aes(x=value)) + geom_histogram(aes(y = ..density..)) + geom_density(colour="cyan", size=1, adjust=1) + theme_tufte()
hist3 + facet_wrap(~ variable, scales="free")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

LondonMelt4 <- melt(LondonSub4, id.vars = 1:3)
attach(LondonMelt4)
## The following objects are masked from LondonMelt3:
## 
##     value, variable, WD11CD, WD11CDO, WD11NM
## The following objects are masked from LondonMelt2:
## 
##     value, variable, WD11CD, WD11CDO, WD11NM
hist4 <- ggplot(LondonMelt4, aes(x=value)) + geom_histogram(aes(y = ..density..)) + geom_density(colour="cyan", size=.7, adjust=1) + theme_tufte()
hist4 + facet_wrap(~ variable, scales="free")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Log10() transformation on the x variables

hist5 <- ggplot(LondonMelt4, aes(x=log10(value))) + geom_histogram(aes(y = ..density..)) + stat_function(fun=dnorm, colour="cyan", size=0.5) + theme_tufte()
hist5 + facet_wrap(~ variable, scales="free")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 90 rows containing non-finite values (stat_bin).

spatial Kernel Density Estimate

londonpoint<-ggplot(LondonSub, aes(x=x.y,y=y.y))+geom_point()+coord_equal()+theme_tufte()
londonpoint

library(viridis)
## Warning: package 'viridis' was built under R version 3.4.4
## Loading required package: viridisLite
## Warning: package 'viridisLite' was built under R version 3.4.3
londonpoint<-ggplot(LondonSub, aes(x=x.y,y=y.y))+stat_bin2d(bins=10)+theme_tufte()+scale_fill_viridis()
londonpoint

londonpoint<-ggplot(LondonSub, aes(x=x.y,y=y.y))+geom_point()+coord_equal()+theme_tufte()
londonpoint

londonpoint+stat_density2d(aes(fill = ..level..), geom="polygon")+theme_tufte()+scale_fill_viridis()

Introduction to functions in R
Structure of a fucntion

myfunction <- function(arg1, arg2, ... ){
  statements
  return(object)
}

Re-classiy data (recode data)

newvar<-0
recode<-function(variable,high,medium,low){
  newvar[variable<=high]<-"High"
  newvar[variable<=medium]<-"Medium"
  newvar[variable<=low]<-"Low"
  return(newvar)
}

# First we initialise a new variable called newvar and set it to = 0. We then define a new function called recode. This takes in 4 pieces of information: A variable (called variable but I could have called it anything) and three values called high, medium and low. It outputs a value to the new string variable newvar based on the values of high, medium and low that are given to the function.

We can now use this function to recode any of our continuous variables into high, medium and low values based on the values we enter into the function.

summary(LondonWardsSF$AvgGCSE201)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   245.0   332.3   343.7   345.8   358.3   409.1
LondonWardsSF$GCSE_recode <- recode(LondonWardsSF$AvgGCSE201,409.1,358.3,332.3)

Location Quotient

#Location Quotient function 1
LQ1<-function(pctVariable){
  pctVariable / mean(pctVariable)
}

#Location Quotient function 2
LQ2<-function(variable,rowtotal){
  localprop<-variable/rowtotal
  globalprop<-sum(variable)/sum(rowtotal)
  return(localprop/globalprop)
}

Calculate Location Quotients for the 5 Housing tenure variables (Owner Occupied, Private Rent, Social Rent, Shared Ownership, Rent Free)

names(LondonWardsSF)
##  [1] "WD11CD"                 "WD11CDO"               
##  [3] "WD11NM"                 "WD11NMW"               
##  [5] "WardName.x"             "WardCode.x"            
##  [7] "Wardcode1"              "PopCensus2"            
##  [9] "Aged0_15"               "Aged16_64"             
## [11] "Aged65plus"             "PctAged0_1"            
## [13] "PctAged16_"             "PctAged65p"            
## [15] "MeanAge201"             "MedianAge2"            
## [17] "AreaSqKM"               "PopDensity"            
## [19] "PctBame"                "PctNotBorn"            
## [21] "PctNoEngli"             "GenFertRat"            
## [23] "MaleLE0509"             "FemaleLE05"            
## [25] "RateAmbula"             "RatesAmbul"            
## [27] "InEmployme"             "Employment"            
## [29] "NoJobs2011"             "EmpWkAgePo"            
## [31] "RateNINoFo"             "MeanHouseP"            
## [33] "NoProperti"             "NoHousehol"            
## [35] "PctDetache"             "PctSemiDet"            
## [37] "PctTerrace"             "PctFlatMai"            
## [39] "PctOwned20"             "PctSocialR"            
## [41] "PctPrivate"             "PctSharedO"            
## [43] "PctRentFre"             "PctCTaxBan"            
## [45] "PctCTaxB_1"             "PctCTaxB_2"            
## [47] "MortgageRe"             "LandlordRe"            
## [49] "Incapacity"             "IncomeSupp"            
## [51] "JSAClaiman"             "JSAClaim_1"            
## [53] "PctDepChil"             "PctDepCh_1"            
## [55] "PctHHNoAdu"             "PctLonePar"            
## [57] "IDRankLond"             "IDPctWorst"            
## [59] "AvgGCSE201"             "UnauthAbse"            
## [61] "PctWithNoQ"             "PctLev4Qua"            
## [63] "CrimeRate1"             "ViolenceRa"            
## [65] "RobberyRat"             "TheftAndHa"            
## [67] "CriminalDa"             "DrugsRate1"            
## [69] "Deliberate"             "PctOpenSpa"            
## [71] "CarsPerHH2"             "AvgPubTran"            
## [73] "TurnoutMay"             "ID"                    
## [75] "x.x"                    "y.x"                   
## [77] "WardName.y"             "WardCode.y"            
## [79] "PctSharedOwnership2011" "PctRentFree2011"       
## [81] "Candidate"              "InnerOuter"            
## [83] "x.y"                    "y.y"                   
## [85] "AvgGCSE2011"            "UnauthAbsenceSchools11"
## [87] "geometry"               "GCSE_recode"
LondonWardsSF$Owner_occ <- LQ1(LondonWardsSF$PctOwned20)
LondonWardsSF$PRent <- LQ1(LondonWardsSF$PctPrivate)
LondonWardsSF$SRent <- LQ1(LondonWardsSF$PctSocialR)
LondonWardsSF$PShared <- LQ1(LondonWardsSF$PctSharedO)
LondonWardsSF$RFree <- LQ1(LondonWardsSF$PctRentFre)

Location Quotient Mapper function by A.Dennett

#############################################################
##A Function for creating various location quotient maps
##
##By Adam Dennett October 2014 - updated November 2018
##
##Please note, this function requires input data to already be in ##the form of row percentages. To create the function, highlight the ##whole block of code and run it. To run the function, simply use  ##LQMapper(your_dataframe)

library(rgeos)
## Warning: package 'rgeos' was built under R version 3.4.4
## rgeos version: 0.3-28, (SVN revision 572)
##  GEOS runtime version: 3.6.1-CAPI-1.10.1 r0 
##  Linking to sp version: 1.2-7 
##  Polygon checking: TRUE
library(ggplot2)
library(maptools)
## Warning: package 'maptools' was built under R version 3.4.4
## Checking rgeos availability: TRUE
library(sf)
library(tmap)
## Warning: package 'tmap' was built under R version 3.4.4
sfdataframe <- LondonWardsSF

LQMapper<-function(sfdataframe){
  print(colnames(sfdataframe))  
  vars<-readline("From the list above, select the variables 
                 you want to calculate location quotients for 
                 separated by spaces...")
  
  # split the string at the spaces  
  vars<-unlist(strsplit(vars, split = "\\s"))  
  # now save vars as a list
  vars<-as.list(vars)  
  
  print("looping to create new location quotient variables...")
  attach(sfdataframe)  
  for(i in 1:length(vars)){
    pctVariable<-vars[[i]]
    colvect<-which(colnames(sfdataframe)==vars[[i]])
    
    #this is a little function to calculate location quotients
    LQ<-function(pctVariable){
      pctVariable/mean(pctVariable)
    }
    #use LQ function here to create new variable in sfdataframe 
    #and save it
    v <- sfdataframe[,colvect]
    sfdataframe[,paste("LQ_",pctVariable, sep="")] <- LQ(v[[pctVariable]])    
  }
  
  #reset i as we're going to use it again in a minute
  i=0
  
  print("now entering the plotting loop")
  for(i in 1:length(vars)){
    print("I'm plotting")
    pctVariable<-paste("LQ_",vars[[i]],sep="")
    colvect<-which(colnames(sfdataframe)==paste("LQ_",vars[[i]],sep=""))
    
    #create the plot
    LQMapperPlot <- tm_shape(sfdataframe) + tm_polygons(pctVariable, 
                        style="jenks",
                        palette="Spectral",
                        midpoint=1,
                        title=pctVariable,
                        alpha = 0.5)
    
    LQMapperPlot
    #save the plot to a pdf and give it a name based on its variable
    tmap_save(LQMapperPlot, filename=paste(pctVariable,".png",sep=""))
    
  }  
  return(sfdataframe)  
}

###################################################################
#LQMapper(LondonWardsSF)
tm_shape(LondonWardsSF) + 
  tm_polygons("SRent", 
              style="jenks",
              palette="PuRd",
              midpoint=1, 
              title="PctSocialR", 
              border.col="white",
              border.alpha = 0.01)

Basic Geodemographic Classification
In a cluster analysis, you should select variables that are: * Ranged on the same scale * Normally distributed * Not highly correlated

LondonWardsDF <- st_set_geometry(LondonWardsSF, NULL)
#display list nad class of variables 
cbind(lapply(LondonWardsDF, class))
##                        [,1]       
## WD11CD                 "factor"   
## WD11CDO                "factor"   
## WD11NM                 "factor"   
## WD11NMW                "factor"   
## WardName.x             "factor"   
## WardCode.x             "factor"   
## Wardcode1              "factor"   
## PopCensus2             "numeric"  
## Aged0_15               "numeric"  
## Aged16_64              "numeric"  
## Aged65plus             "numeric"  
## PctAged0_1             "numeric"  
## PctAged16_             "numeric"  
## PctAged65p             "numeric"  
## MeanAge201             "numeric"  
## MedianAge2             "numeric"  
## AreaSqKM               "numeric"  
## PopDensity             "numeric"  
## PctBame                "numeric"  
## PctNotBorn             "numeric"  
## PctNoEngli             "numeric"  
## GenFertRat             "numeric"  
## MaleLE0509             "numeric"  
## FemaleLE05             "numeric"  
## RateAmbula             "numeric"  
## RatesAmbul             "numeric"  
## InEmployme             "numeric"  
## Employment             "numeric"  
## NoJobs2011             "numeric"  
## EmpWkAgePo             "numeric"  
## RateNINoFo             "numeric"  
## MeanHouseP             "numeric"  
## NoProperti             "numeric"  
## NoHousehol             "numeric"  
## PctDetache             "numeric"  
## PctSemiDet             "numeric"  
## PctTerrace             "numeric"  
## PctFlatMai             "numeric"  
## PctOwned20             "numeric"  
## PctSocialR             "numeric"  
## PctPrivate             "numeric"  
## PctSharedO             "numeric"  
## PctRentFre             "numeric"  
## PctCTaxBan             "numeric"  
## PctCTaxB_1             "numeric"  
## PctCTaxB_2             "numeric"  
## MortgageRe             "numeric"  
## LandlordRe             "numeric"  
## Incapacity             "numeric"  
## IncomeSupp             "numeric"  
## JSAClaiman             "numeric"  
## JSAClaim_1             "numeric"  
## PctDepChil             "numeric"  
## PctDepCh_1             "numeric"  
## PctHHNoAdu             "numeric"  
## PctLonePar             "numeric"  
## IDRankLond             "numeric"  
## IDPctWorst             "numeric"  
## AvgGCSE201             "numeric"  
## UnauthAbse             "numeric"  
## PctWithNoQ             "numeric"  
## PctLev4Qua             "numeric"  
## CrimeRate1             "numeric"  
## ViolenceRa             "numeric"  
## RobberyRat             "numeric"  
## TheftAndHa             "numeric"  
## CriminalDa             "numeric"  
## DrugsRate1             "numeric"  
## Deliberate             "numeric"  
## PctOpenSpa             "numeric"  
## CarsPerHH2             "numeric"  
## AvgPubTran             "numeric"  
## TurnoutMay             "numeric"  
## ID                     "integer"  
## x.x                    "factor"   
## y.x                    "factor"   
## WardName.y             "character"
## WardCode.y             "character"
## PctSharedOwnership2011 "numeric"  
## PctRentFree2011        "numeric"  
## Candidate              "character"
## InnerOuter             "character"
## x.y                    "numeric"  
## y.y                    "numeric"  
## AvgGCSE2011            "numeric"  
## UnauthAbsenceSchools11 "numeric"  
## GCSE_recode            "character"
## Owner_occ              "numeric"  
## PRent                  "numeric"  
## SRent                  "numeric"  
## PShared                "numeric"  
## RFree                  "numeric"

Create new Dataframe with 2 variables

GD<-as.data.frame(LondonWardsDF[,c("CarsPerHH2","PctAged65p")])
mean(LondonWardsDF$CarsPerHH2) 
## [1] 0.8426429
median(LondonWardsDF$CarsPerHH2) 
## [1] 0.818353
mean(LondonWardsDF$PctAged65p) 
## [1] 11.34829
median(LondonWardsDF$PctAged65p) 
## [1] 10.66135
# run a k-means to find 3 clusters – use 25 iterations
fit <- kmeans(GD, 3, nstart=25) # 3 cluster solution
# get cluster means
centroid<-aggregate(GD,by=list(fit$cluster),FUN=mean)
#print the results of the cluster groupings
centroid
# as we only have variable two dimensions we can plot the clusters on a graph
p <- ggplot(GD,aes(CarsPerHH2, PctAged65p))
p+geom_point(aes(colour=factor(fit$cluster)))+geom_point(data=centroid[,2:3],aes(CarsPerHH2, PctAged65p), size=7, shape=18)+ theme(legend.position="none")

GD$cluster <- fit$cluster

#add the cluster groups to the LondonWards data frame
LondonWardsSF$cluster<-GD$cluster
#now map our geodeomographic classification
map <- ggplot(LondonWardsSF) + geom_sf(mapping = aes(fill=cluster))+theme_tufte()+scale_fill_viridis()
map